# -*- coding: utf-8 -*-
import scrapy
from scrapy.selector import Selector
from scrapy.loader import ItemLoader, Identity

from ComicScra.items import CsdnItem


class TestcsdnSpider(scrapy.Spider):
    name = 'testcsdn'
    #allowed_domains = ['http://blog.csdn.net']
    start_urls = ['http://blog.csdn.net/zzg_550413470/']

    def parse(self, response):
        sel = Selector(response)
        for link in sel.xpath("//h1/span[@class='link_title']/a/@href").extract():
            request = scrapy.Request("http://blog.csdn.net"+link, callback=self.parse_item)
            yield request

        # pages = sel.xpath("//div[@class='navigation']/div[@id='wp_page_numbers']/ul/li/a/@href").extract()
        # print('pages: %s' % pages)
        # if len(pages) > 2:
        #     page_link = pages[-2]
        #     page_link = page_link.replace('/a/', '')
        #     request = scrapy.Request('http://www.meizitu.com/a/%s' % page_link, callback=self.parse)
        #     yield request

    def parse_item(self, response):
        l=CsdnItem()
        l["title"] = response.xpath("//h1/span[@class='link_title']/a/text()").extract()
        l["content"] = response.xpath("//div[@id='article_content']/div[@class='markdown_views']/p/text()").extract()
        l["url"] = response.url
        # l = ItemLoader(item=CsdnItem(), response=response)
        # l.add_xpath('title', "//h1/span[@class='link_title']/a/text()")
        # l.add_xpath('content', "//div[@id='article_content']/div[@class='markdown_views']/text()")
        # #l.add_xpath('image_urls', "//div[@id='picture']/p/img/@src", Identity())
        # l.add_value('url', response.url)
        return l #.load_item()